fb <- fb %>%
mutate(edu = str_trim(edu, side = "right"))
fb$edu_relevel <- factor(fb$edu, levels = c("No formal schooling","Informal schooling only (including Koranic schooling)",
"Some primary schooling","Primary school completed",
"Intermediate school or Some secondary school / high school",
"Secondary school / high school completed",
"Post-secondary qualifications, other than university e.g. a diploma or degree from a polytechnic or college",
"Some university","University completed","Post-graduate"))
fb$edu_num <- as.numeric(fb$edu_relevel)
# prepare data for weighting
# define n of fb sample
fbN <-nrow(fb) #1528
## get population numbers --> frequencies of >18 pop
# 18-29
# 30-49
# 50-59
# 60+
## age ##
total18pop <- V3_T2.2 %>%
filter(
!Age %in% c("Total","0-4","5-9","10-14","15-19","20-24","25-29",
"30-34","35-39","40-44","45-49","50-54","55-59","60-64",
"65-69","70-74","75-79","80-84","85-89","90-94","95-99","NotStated")
) %>%
mutate(Age=dplyr::recode(Age, "100+"="100"),
Age = as.numeric(Age)) %>%
summarise(
total18 = sum(Total[Age>=18] - 687), # subtract not stated ages from total
male = sum(Male[Age>=18]),
female = sum(Female[Age>=18]),
age1829 = sum(Total[Age>=18 & Age<=29]),
age3049 = sum(Total[Age>=30 & Age<=49]),
age5059 = sum(Total[Age>=50 & Age<=59]),
age60 = sum(Total[Age>=60])
)
# calculate proportion of pop * fb sample size
total18plus <- total18pop[1]
pop.prop <- (total18pop/as.numeric(total18plus))
pop.prop.fbn <- (total18pop/as.numeric(total18plus))*fbN
# urban pop > 18yo
urban <- V3_T2.2b %>% filter(Age!="Total",
Age!="0 - 4",
Age!="5-9",
Age!="10-14",
Age!="15-19",
Age!="20-24",
Age!="25-29",
Age!="30-34",
Age!="35-39",
Age!="40-44",
Age!="45-49",
Age!="50-54",
Age!="55-59",
Age!="60-64",
Age!="65-69",
Age!="70-74",
Age!="75-79",
Age!="80-84",
Age!="85-89",
Age!="90-94",
Age!="95-99",
Age!="NotStated",
Age!="Not Stated") %>%
mutate(Age=dplyr::recode(Age, "100+"="100"),
Age = as.numeric(Age))%>%
summarise(
urban = sum(Total[Age>=18])
)
urban.prop <- urban/total18plus
rural.prop <- 1-urban.prop
## tribe ##
# want % luo and % kikuyu
# retrieved from file: "census_tables_cleaned_VOLUME IV KHPC 2019.xls" (tab 2.31)
luo <- 5066966/47067376
kikuyu <- 8148668/47067376
tribe.prop <- cbind(luo, kikuyu)
## religion ##
# want % Christian and % Muslim and % No religion
# retrieved from file: "census_tables_cleaned_VOLUME IV KHPC 2019.xls" (tab 2.30)
totalrel <- 47213282 - 73253 - 6909 # total - dont knows - not stated
christian <- (9726169 +	15777473 + 9648690 + 3292573 + 201263 + 1732911)/totalrel
muslim <- 5152194/totalrel
norel <- 755750/totalrel
religion.prop <- cbind(christian, muslim, norel)
## education ##
#View(V4_T2.5) # Distribution of Population Aged 3 Years and Above by Highest Level of Education Completed
# CATEGORIES: “Didn’t complete secondary school,” “Secondary degree,” and “College degree or higher"
totaledu <- 36212477 - (101326 + 9132) # minus DK / not stated
nonsec <- 3616843 + 18341098 + 51996 # preprimary + primary + madrasa
secondary <- 9787320 + 2755273 #includes technical
collegeplus <- 1511943
edu.prop <- cbind(nonsec,secondary,collegeplus)/totaledu
## create tables of frequencies for each variable ##
pop.all <- cbind(pop.prop[-1],urban.prop, religion.prop, tribe.prop, edu.prop)
# census table of demographics #
dem.cens <- pop.all[c(2:15)]
names(dem.cens)[2:5] <- c("age1","age2","age3","age4")
dem.cens <- as.data.frame(t(dem.cens))
names(dem.cens)[1] <- "mean"
dem.cens$variable <- rownames(dem.cens)
dem.cens$se <- NA
dem.cens$ci.low <- NA
dem.cens$ci.high <- NA
dem.cens$sample <- "Census"
## create frequencies for census categories for weighting ##
sex_dist <- tibble(sex = c("Male", "Female"), Freq = c(round((1-pop.prop$female)*fbN),round(pop.prop$female*fbN)))
age_dist4 <- setDT(round(as.data.frame(t(pop.prop[4:7])*fbN)), keep.rownames = TRUE)[]
names(age_dist4) <- c("agegroup4","Freq")
urbrur_dist <- setDT(as.data.frame(t(cbind(round(urban.prop*fbN),round(rural.prop*fbN)))), keep.rownames = TRUE)[]
names(urbrur_dist) <- c("urbrur","Freq")
urbrur_dist$urbrur[2] <- "rural"
edu_dist <- setDT(round(as.data.frame(t(edu.prop*fbN))), keep.rownames = TRUE)[]
names(edu_dist) <- c("edu3","Freq")
## RUN RAKING ##
## create weights for fb data ##
## sex
summary(factor(fb$sex))
## age
fb <- fb %>%
mutate(
#agegroup2 = ifelse(age<32,"under32","over32"),
agegroup4 = ifelse(age<30,"age1829",
ifelse(age>=30 & age<50, "age3049",
ifelse(age>=50 & age<60, "age5059","age60")))
)
## define urbrur variable in survey data ##
table(fb$NEW_rururb_self)
# do most ppl who live 20-30 min from town say it's rural or urban?
table(fb[fb$ride_town=="20-30 minutes",]$NEW_rururb_self)
# 73 = rural, 41 = urban
# same coding previously
table(fb$ride_town)
fb <- fb %>%
filter(ride_town!="-99") %>%
mutate(
urbrur = ifelse(ride_town %in% c("20-30 minutes","30-60 minutes","more than 60 minutes"), "rural","urban")
)
table(fb$urbrur)
## education -- 3 categories: less than sec, secondary/technical, university+
fb <- fb %>% mutate(
edu_3 = ifelse(edu_relevel %in% c("No formal schooling","Informal schooling only (including Koranic schooling)","Some primary schooling","Primary school completed","Intermediate school or Some secondary school / high school"), "nonsec",
ifelse(edu_relevel %in% c("Secondary school / high school completed","Post-secondary qualifications, other than university e.g. a diploma or degree from a polytechnic or college","Some university"), "secondary","collegeplus")
)
)
fb$edu3 <- factor(fb$edu_3, levels = c("nonsec","secondary","collegeplus"))
## create survey design object with no clusters and unit weights, from survey sample
fb_unweighted <- svydesign(ids=~0,data=fb)
## create raked weights
rake_agesexgeog <- rake(design = fb_unweighted, ## survey design object from above
sample.margins = list(~sex, ~urbrur, ~edu3, ~agegroup4), #, ## names of weighting variables in dataset
population.margins = list(sex_dist, urbrur_dist, edu_dist, age_dist4))
# add weights to fb dataset
fb$weight<-weights(rake_agesexgeog)
## examine weights
plot(density(weights(rake_agesexgeog)))
quantile(weights(rake_agesexgeog),probs=c(.05,.5,.95))
length(weights(rake_agesexgeog)[weights(rake_agesexgeog)>quantile(weights(rake_agesexgeog),probs=c(.95))])
## 71 people with weights above the 95 percentile
# trim weights
rake.trim <- trimWeights(rake_agesexgeog, lower=-Inf, #quantile(weights(rake_agesexgeog),probs=c(.05)),
upper=quantile(weights(rake_agesexgeog),probs=c(.95)),strict=TRUE)
fb$weight_trimmed<-weights(rake.trim)
# min = .6674, max =  2.456, mean = 0.897
## Save survey data set with weights ##
View(fb)
## Save survey data set with weights ##
write_csv(fb,file="data sets/demos_kenya_facebook_cleaned.csv")
## Save survey data set with weights ##
write_csv(fb,file="data sets/kenya_facebook_cleaned.csv")
#      Creating data table for DEMOGRAPHIC figure (FIGURE 2) ####
# define demographic variables of interest
dem.vars <- c("female","age1","age2","age3","age4", # ages1-4: "18-29","30-49","50-59","60+",
"christian","muslim","norel",
"nonsec","secondary", "collegeplus",
"luo","kikuyu",
"urban")
fb$female <- ifelse(fb$sex == "Female",1,0)
fb$age1 <- ifelse(fb$age %in% c(18:29),1,0)
fb$age2 <- ifelse(fb$age %in% c(30:49),1,0)
fb$age3 <- ifelse(fb$age %in% c(50:59),1,0)
fb$age4 <- ifelse(fb$age >= 60,1,0)
fb$christian <- ifelse(fb$religion == "Christian",1,
ifelse(fb$religion %in% c(-99),NA,0))
fb$muslim <- ifelse(fb$religion == "Muslim",1,
ifelse(fb$religion %in% c(-99),NA,0))
fb$norel <- ifelse(fb$religion == "None",1,
ifelse(fb$religion %in% c(-99),NA,0))
fb$nonsec <- ifelse(fb$edu3 == "nonsec",1,0)
fb$secondary <- ifelse(fb$edu3 == "secondary",1,0)
fb$collegeplus <- ifelse(fb$edu3 == "collegeplus",1,0)
fb$luo <- ifelse(fb$tribe == "Luo",1,
ifelse(fb$tribe == -99,NA,0))
fb$kikuyu <- ifelse(fb$tribe == "Kikuyu",1,
ifelse(fb$tribe == -99,NA,0))
fb$urban <- ifelse(fb$urbrur=="urban",1,0)
# function to get means/std devs (raw and weighted) for demographic variables
fb.dems <- fb %>%
select(all_of(dem.vars)) %>%
map_df(~ tibble(
mean = my.mean(.x),
se = my.se(.x),
ci.low = mean - 1.96*se,
ci.high = mean + 1.96*se
), .id = "variable")
fb.dems$sample <- "Facebook Sample"
calculate_weighted_stats <- function(data, var_names, weight_col, ids = ~1) {
# Create the survey design
design <- svydesign(ids = ids, data = data, weights = data[[weight_col]])
# Initialize an empty data frame to store results
results <- tibble(variable = character(), weighted_mean = numeric(), weighted_se = numeric(),
ci.low = numeric(), ci.high = numeric())
# Loop through each variable and calculate weighted mean and SE
for (var in var_names) {
# Calculate weighted mean and SE
mean_se <- svymean(as.formula(paste0("~", var)), design, na.rm = TRUE)
# Extract results and add to the data frame
results <- results %>%
add_row(variable = var,
weighted_mean = coef(mean_se),
weighted_se = SE(mean_se),
ci.low = weighted_mean - 1.96*weighted_se,
ci.high = weighted_mean + 1.96*weighted_se)
}
names(results) <- c("variable","mean","se","ci.low","ci.high")
return(results)
}
fb.dems.weighted <- calculate_weighted_stats(fb, dem.vars, "weight_trimmed")
fb.dems.weighted$sample <- "Facebook Sample (weighted)"
fb.dems.all <- rbind(fb.dems, fb.dems.weighted)
# female
ab$female <- ifelse(ab$Q101==2,1,0)
# 18-29, 30-49, 50-59, 60+
ab$age <- as.numeric(ab$Q1)
ab$age1 <- ifelse(ab$age %in% c(18:29),1,0)
ab$age2 <- ifelse(ab$age %in% c(30:49),1,0)
ab$age3 <- ifelse(ab$age %in% c(50:59),1,0)
ab$age4 <- ifelse(ab$age >= 60,1,0)
# christian
ab$christian <- ifelse(ab$Q98A %in% c(1:17,30:33),1,
ifelse(ab$Q98A %in% c(9995,9998,9999,-1),NA,0))
# muslim
ab$muslim <- ifelse(ab$Q98A %in% c(18:24),1,
ifelse(ab$Q98A %in% c(9995,9998,9999,-1),NA,0))
# no religion
ab$norel <- ifelse(ab$Q98A == 0,1,
ifelse(ab$Q98A %in% c(9995,9998,9999,-1),NA,0))
# education - 3 categories
# some - but not completed coded in prior category (e.g. some secondary included in nonsec, some college included in secondary)
ab$edu <- ifelse(ab$Q97 %in% c(-1,98), NA, ab$Q97)
ab$nonsec <- ifelse(ab$edu<5,1,0)
ab$secondary <- ifelse(ab$edu %in% c(5:7),1,0)
ab$collegeplus <- ifelse(ab$edu>7,1,0)
# luo
ab$luo <- ifelse(ab$Q81 == 301,1,
ifelse(ab$Q81 > 9990, NA,0))
# kikuyu
ab$kikuyu <- ifelse(ab$Q81 == 300,1,
ifelse(ab$Q81 > 9990, NA,0))
# urban
ab$urban <- ifelse(ab$URBRUR==1,1,0)
# calculate ab demographic mean/se
ab.dems <- ab %>%
select(all_of(dem.vars)) %>%
map_df(~ tibble(
mean = my.mean(.x),
se = my.se(.x),
ci.low = mean - 1.96*se,
ci.high = mean + 1.96*se
), .id = "variable")
ab.dems$sample <- "AB Survey"
##  calculate weighted average for Afrobarometer data ##
AB_weighted <- calculate_weighted_stats(data = ab,var_names = dem.vars,weight_col = "withinwt_hh", ids = ~1)
AB_weighted$sample <- "AB (weighted)"
all.dems <- bind_rows(dem.cens, as.data.frame(fb.dems.all), as.data.frame(ab.dems), as.data.frame(AB_weighted))
rownames(all.dems) <- NULL
## output data for figure 2: demographic comparisons ####
write.csv(all.dems,file=paste0(data_exports,"fig2_demos_kenya.csv"))
## ads by gender
fb <- fb %>%
mutate(
femalead = ifelse(grepl("_f",location),1,0),
malead = ifelse(grepl("_m",location),1,0),
sexadright = ifelse(grepl("_f",location) & sex=="Female" |
grepl("_m",location) & sex=="Male",
1,0)
)
sum(fb$sexadright)/sum(fb$femalead==1 | fb$malead==1)
## ads by age
fb <- fb %>%
mutate(
over32ad = ifelse(grepl("old",location),1,0),
oldadright = ifelse(grepl("old",location) & age>=32,1,0)
)
oldad <- fb[fb$over32ad==1,]
table(oldad$oldadright)/nrow(oldad)
## ads by education (not specified)
fb <- fb %>%
mutate(
unspeceduad = ifelse(grepl("eduna",location),1,0),
eduadright = ifelse(grepl("eduna",location) & edu_num <= 5,1,0)
)
eduad <- fb[fb$unspeceduad==1,]
table(eduad$eduadright)/nrow(eduad)
## ads by location ##
# import shapefiles for spatial data #
counties_shp <- st_read('gis/kenya_counties/County.shp')
regions_shp  <- st_read('gis/kenya_region_shapefile/kenya_region_shapefile.shp')
towns_shp    <- st_read('gis/kenya_all_towns/kenya_all_towns.shp')
# spatial join to know which counties in which regions
counties_shp <- st_transform(counties_shp, st_crs(regions_shp))
# Perform a spatial intersection to get overlapping geometries between counties and regions
intersection <- st_intersection(counties_shp, regions_shp)
# Calculate the area of each intersection (part of county that overlaps with a region)
intersection <- intersection %>%
mutate(intersection_area = st_area(.))
# Group by county and region to find the largest intersection area per county
county_region_areas <- intersection %>%
group_by(county_id = COUNTY, region_name = name_1) %>%
summarize(total_intersection_area = sum(intersection_area), .groups = "drop")
# For each county, select the region with the largest intersection area
counties_with_regions <- county_region_areas %>%
group_by(county_id) %>%
slice_max(order_by = total_intersection_area, n = 1) %>%
ungroup()
# this is county - region pairs
county_region <- as.data.frame(counties_with_regions)[,c("county_id","region_name")]
# clean county names
county_region <- county_region %>%
mutate(county_id = str_replace_all(str_to_lower(county_id), "[[:punct:][:space:]]", "")) %>%
mutate(region_name = str_replace_all(str_to_lower(region_name), "[[:punct:][:space:]]", "")) %>%
mutate(county_id = str_replace_all(county_id, c("keiyomarakwet" = "elgeyomarakwet",
"tharaka" = "tharakanithi")))
# read coordinates of kmeans targets used in FB ads
ab_targets <- read.csv("data sets/afrobarometer_kmeans_targets.csv")
# Convert to an sf object, specifying the latitude and longitude columns as coordinates
ab_targets <- st_as_sf(ab_targets, coords = c("longitude", "latitude"), crs = 4326)  # 4326 is the EPSG code for WGS84
# get counties for each coordinate used for ads
# Ensure both layers are using the same Coordinate Reference System (CRS)
ab_targets <- st_transform(ab_targets, st_crs(regions_shp))
# Perform a spatial join to identify which county each point falls into
points_in_counties <- st_join(ab_targets, regions_shp, join = st_within)
kmeans_region <- as.data.frame(points_in_counties)[,c("kmeans_joint","name_1")]
kmeans_region <- kmeans_region %>%
mutate(name_1 = str_replace_all(tolower(name_1), " ", "")) %>%
distinct(kmeans_joint, name_1)
wide_kmeans_regions <- kmeans_region %>%
group_by(kmeans_joint) %>%
mutate(region_number = paste0("region", row_number())) %>%  # Assign region1, region2, etc.
pivot_wider(names_from = region_number, values_from = name_1)
# define regions
regions <- county_region$region_name
# Create new columns for region and kmeans_number
fb <- fb %>%
mutate(
# Extract region if it matches one of the specified regions, otherwise NA
ad_region = ifelse(str_extract(location, paste(regions, collapse = "|")) %in% regions,
str_extract(location, paste(regions, collapse = "|")),
NA),
# Extract the kmeans number if it matches the "kmeans_" pattern, otherwise NA
kmeans_number = ifelse(str_detect(location, "kmeans_"),
str_extract(location, "kmeans_\\d+"),
NA)
)
# map kmeans_number to regions using kmeans_region data
# Convert kmeans_number to numeric for joining
fb <- fb %>%
mutate(kmeans_number = as.numeric(str_replace(kmeans_number, "kmeans_", "")))
# Join fb with kmeans_region to get the region associated with each kmeans_number
fb <- fb %>%
left_join(wide_kmeans_regions, by = c("kmeans_number" = "kmeans_joint"))
# collect self-reported region from county #
sum(fb[,c("county")]==fb[,c("county_most")]) # 80% of respondents report the county they live in == county they spend most time
# using county they say they live in "county" -- compare match btw self-reported county-region and ad-targeted region
fb <- fb %>%
mutate(county = county %>%
str_remove("(?i)county") %>%             # Remove the word "county" (case-insensitive)
str_to_lower() %>%                       # Convert to lowercase
str_replace_all("[[:punct:][:space:]]", "")  # Remove all punctuation and spaces
)
# Join fb with county_region to map county to region
fb <- fb %>%
left_join(county_region, by = c("county" = "county_id")) %>%
rename(self_region = region_name)  # Rename region_name to self_region
# create binary variable: 1==self-region matches any of ad_region, region1-3, 0 otherwise
fb <- fb %>%
mutate(
region_match = ifelse(
(self_region == ad_region & !is.na(ad_region)) |
(self_region == region1 & !is.na(region1)) |
(self_region == region2 & !is.na(region2)) |
(self_region == region3 & !is.na(region3)),
1,  # Set to 1 if there's a match
0   # Set to 0 if there's no match
)
)
sum(fb$region_match)/nrow(fb) #70% match on region location (self-reported and ad targets)
## load dataset that includes respondents who didn't finish the survey (who are excluded from analysis above) ##
fb.all <- read.csv("data sets/kenya_facebook_plusnonresponse.csv")
## calculate attrition rate (of the ppl who consented to survey)
table(fb.all$consent) # everyone in dataframe consented
# define attritors as finished in <5 min OR didn't finish at all
fb.all$attrit <- ifelse(fb.all$finished==0 | fb.all$finished==1 & as.numeric(fb.all$Duration..in.seconds.)/60<5, 1, 0)
sum(fb.all$attrit)/nrow(fb.all) #25% attrition
## regression of demo vars predicting attrition ##
nonresp.reg <- lm(attrit ~ ad_old + ad_male, data = fb.all)
sumnonresp <- summary(nonresp.reg)
xtable(nonresp.reg)
coefficients_summary <- sumnonresp$coefficients
# Extract the standard error
SE_beta_hat <- coefficients_summary[, "Std. Error"]
# Extract the estimate
beta_hat <- coefficients_summary[, "Estimate"]
# t critical value for 95% CI, you can get this from qt() function
# Degrees of freedom (df) can be extracted from the model
df <- sumnonresp$df[2]
alpha <- 0.05
t_critical <- qt(1 - alpha/2, df)
# Calculate the margin of error
margin_of_error <- t_critical * SE_beta_hat
# Lower and upper bounds
conf.low <- beta_hat - margin_of_error
conf.high <- beta_hat + margin_of_error
att_tab <- as.data.frame(cbind(coefficients_summary, conf.low, conf.high, df))
names(att_tab) <- tolower(names(att_tab))
## data export for figure 3: predictors of non-response ####
write.csv(att_tab,paste0(data_exports,"fig3_attritionreg_kenya.csv"))
## save ##
# Subset data for relevant categories
subset_save <- fb[fb$asian_save %in% c("Program A", "Program B"), ]
# Calculate weighted counts
weighted_save <- with(subset_save, tapply(weight_trimmed, asian_save, sum))
# Calculate proportions
weighted_prop_save <- weighted_save / sum(weighted_save)
## die ##
# Subset data for relevant categories
subset_die <- fb[fb$asian_die %in% c("Program A", "Program B"), ]
# Calculate weighted counts
weighted_die <- with(subset_die, tapply(weight_trimmed, asian_die, sum))
# Calculate proportions
weighted_prop_die <- weighted_die / sum(weighted_die)
att.vars <- c("protest",
"raise",
"attendmtg",
"voted",
"closeparty",
"approvepres",
"freetosay")
# protested - 11C
ab$protest <- ifelse(ab$Q11C %in% c(2,3,4),1,
ifelse(ab$Q11C %in% c(8,9),NA,0))
# raise issue - 11B
ab$raise <- ifelse(ab$Q11B %in% c(2,3,4),1,
ifelse(ab$Q11B %in% c(8,9),NA,0))
# attend mtg - 11A
ab$attendmtg <- ifelse(ab$Q11A %in% c(2,3,4),1,
ifelse(ab$Q11A %in% c(8,9),NA,0))
# voted - 13
ab$voted <- ifelse(ab$Q13 == 3,1,
ifelse(ab$Q13 %in% c(1,2,9),NA,0))
# close to pol party - 91A
ab$closeparty <- ifelse(ab$Q91A == 1,1,
ifelse(ab$Q91A %in% c(-1,8,9),NA,0))
# approve of pres - 51A **same wording for Q but different answer options - may need to drop and replace w/another Q**
ab$approvepres <- ifelse(ab$Q51A %in% c("4","3"),1,
ifelse(ab$Q51A %in% c(8,9),NA,0))
# free to speak (say what you think) - 10A
# code as binary
ab$freetosay <- ifelse(ab$Q10A %in% c(3,4),1, #3-somewhat, 4 - completely
ifelse(ab$Q10A %in% c(9),NA,0))
fb <- fb %>% mutate(
protest = ifelse(engage_diss_protest %in% c("Yes, often","Yes, several times","Yes, once or twice"),1,
ifelse(engage_diss_protest == -99,NA,0)),
raise = ifelse(engage_activity_issu %in% c("Yes, often","Yes, several times","Yes, once or twice"),1,
ifelse(engage_activity_issu == -99,NA,0)),
attendmtg = ifelse(engage_activity_mtg %in% c("Yes, often","Yes, several times","Yes, once or twice"),1,
ifelse(engage_activity_mtg == -99,NA,0)),
voted = ifelse(voted == "I voted",1,ifelse(voted == -99, NA, 0)),
closeparty = ifelse(polparty == "Yes",1,0),
approvepres = ifelse(pol_pres %in% c("Somewhat approve","Strongly approve"),1,
ifelse(pol_pres == -99,NA,0)),
freetosay = ifelse(pol_free %in% c("Somewhat free","Completely free"),1,0)
)
# fb attitude table
fb.att.weighted <- calculate_weighted_stats(fb, att.vars, "weight_trimmed")
fb.att.weighted$sample <- "Facebook Sample (weighted)"
names(fb.att.weighted)
fb.att <- fb %>%
select(all_of(att.vars)) %>%
map_df(~ tibble(
mean = my.mean(.x),
se = my.se(.x),
ci.low = mean - 1.96*se,
ci.high = mean + 1.96*se
), .id = "variable")
fb.att$sample <- "Facebook Sample"
# calculate ab demographic mean/se
ab.att <- ab %>%
select(all_of(att.vars)) %>%
map_df(~ tibble(
mean = my.mean(.x),
se = my.se(.x),
ci.low = mean - 1.96*se,
ci.high = mean + 1.96*se
), .id = "variable")
ab.att$sample <- "Afrobarometer"
##  calculate weighted average for Afrobarometer data ##
ab.att.weighted <- calculate_weighted_stats(data = ab, var_names = att.vars, weight_col = "withinwt_hh", ids = ~1)
ab.att.weighted$sample <- "Afrobatometer (weighted)"
att.all <- rbind(fb.att,fb.att.weighted, ab.att,ab.att.weighted)
## output data underlying figure 4: political behavior. ####
## figure is created in Python script
write_csv(att.all,paste0(data_exports,"fig4_behavior_kenya.csv"))
